rm(list=ls())
library(tidyverse)

# Function to create datase with observations per muncode-year and time-variable columns 
center <-function(df,year_center){
  df <- df %>% 
    mutate(center = year - year_center) %>% 
    select(-year) %>%
    filter(between(center,-3,4)) %>% 
    mutate(center = str_replace(ifelse(center<0,paste0("t_minus",center),paste0("t_plus",center)),"-",""),
           center = str_replace(center,"t_plus0","t_0")) %>% 
    gather(variable, value, -c(mun_code,center)) %>%  
    unite(center,variable,center) %>%
    spread(center,value) %>% 
    mutate(year = year_center) 
  return(df)
}


# Prepare CNES HR data -----

# list files
cnes_hr_files <- list.files(here::here("data","raw","DATASUS","CNES","ESTABELECIMENTO_RH"),
                                    pattern = ".csv",
                                    full.names = T)

# Create a function to read and create the variables of interest from SIAB
read_cnes_hr <- function(file){
  
  # Read the csv file
  x <- read_delim(file, delim = ";",
                  skip = 4,
                  local = locale(encoding = "latin1"),
                  na = "-")
  
  # Some clean up
  x <- x %>% 
    select(municipio = Município ,total = Total) %>% 
    mutate(mun_code = str_sub(municipio,1,6),
           mun_name = str_sub(municipio,8)) %>% 
    filter(!is.na(as.numeric(mun_code))) %>% 
    mutate(docs = as.numeric(ifelse(is.na(total),0,total))) %>% 
    select(mun_code,mun_name,docs)
  
  # Obtain the year of the file
  year <- read_delim(file, delim = ";",
                     skip = 3,
                     n_max = 1,
                     col_names = F) %>% 
    pull(X1)  
  
  year <- str_extract(year,"[0-9]{4}")
  
  adm_level <- read_delim(file, delim = ";",
                               skip = 4,
                               n_max = 1,
                               col_names = F,
                               local = locale(encoding = "latin1")) %>% 
    pull(X1)
  
  adm_level <- str_to_lower(stringi::stri_trans_general(adm_level,"latin-ascii"))
  
  # Create year and level of administration variable in the dataset
  x <- x %>% 
    mutate(year = as.numeric(year),
           adm_level = adm_level) %>% 
    relocate(mun_code,mun_name,year)
  
  return(x)
  
}

# read files into R
cnes_hr_data <- map_df(cnes_hr_files,
                          read_cnes_hr)

# Check number of municipalities per year
cnes_hr_data %>% 
  group_by(year) %>% 
  summarise(n_obs = n())

# Check if all data from municipality
cnes_hr_data %>% distinct(adm_level)

# Prepare CNES ESTAB data -----

# list files
cnes_estab_files <- list.files(here::here("data","raw","DATASUS","CNES","ESTABELECIMENTO_NIVEL_ATENCAO"),
                            pattern = ".csv",
                            full.names = T)

# Create a function to read and create the variables of interest from SIAB
read_cnes_estab <- function(file){
  
  # Read the csv file
  x <- read_delim(file, delim = ";",
                  skip = 3,
                  local = locale(encoding = "latin1"),
                  na = "-")
  
  # Some clean up
  x <- x %>% 
    rename_all(~str_to_lower(stringi::stri_trans_general(.,"latin-ascii"))) %>% 
    mutate(mun_code = str_sub(municipio,1,6),
           mun_name = str_sub(municipio,8)) %>% 
    filter(!is.na(as.numeric(mun_code))) %>% 
    rename(clinic_basic = `ambulatorial_-_basica_municipal`,
           clinic_medium = `amb_-_media_complex_municipal`,
           clinic_high = `amb_-_alta_complex_municipal`,
           hospital_medium = `hosp_-_media_complex_municipal`,
           hospital_high = `hosp_-_alta_complex_municipal`) %>% 
    filter(!is.na(as.numeric(mun_code))) %>% 
    mutate(across(clinic_basic:hospital_high,~ifelse(is.na(.),0,.))) %>% 
    mutate(clinic_total = clinic_basic+clinic_medium+clinic_high) %>% 
    select(mun_code,mun_name,starts_with("clinic"),starts_with("hospital"))  
  
  # Obtain the year of the file
  year <- read_delim(file, delim = ";",
                     skip = 2,
                     n_max = 1,
                     col_names = F) %>% 
    pull(X1)  
  
  year <- str_extract(year,"[0-9]{4}")
  
  # Create year  variable in the dataset
  x <- x %>% 
    mutate(year = as.numeric(year)) %>% 
    relocate(mun_code,mun_name,year)
  
  return(x)
  
}

# read files into R
cnes_estab_data <- map_df(cnes_estab_files,
                       read_cnes_estab)

# Check number of municipalities per year
cnes_estab_data %>% 
  group_by(year) %>% 
  summarise(n_obs = n())



# Read Population
mun_pop <- read_rds(here::here("data","processed","citycharacteristics","pop_mun.rds")) %>% 
  select(-mun_name) %>% 
  filter(between(year,2005,2016))

# Join
cnes_estab_data <- cnes_estab_data %>% 
  select(-mun_name)

cnes_hr_data <- cnes_hr_data %>% 
  select(-mun_name,-adm_level)

cnes <- mun_pop %>% 
  left_join(cnes_hr_data, by = c("mun_code","year")) %>% 
  left_join(cnes_estab_data, by = c("mun_code","year"))

cnes <- cnes %>% 
  mutate(docs = ifelse(is.na(docs),0,docs)) %>% 
  mutate(across(docs:hospital_high,~(./mun_pop*100000),.names = "{.col}_100K")) 
  

# Prepare Infant Mortality -----

live_births <- read_delim(here::here("data","raw","DATASUS","nascimentos_municipios_res_mae.csv"),
                          delim = ";",
                          skip = 3,
                          local = locale(encoding = "latin1"),
                          na = "-")

live_births <- live_births %>% 
  mutate(mun_code = str_sub(Município,1,6),
       mun_name = str_sub(Município,8)) %>% 
  filter(!is.na(as.numeric(mun_code))) %>% 
  select(-Município,-Total)

live_births <- live_births %>% 
  pivot_longer(cols = 1:16, names_to = "year", values_to = "live_births") %>% 
  mutate(year = as.numeric(year),
         live_births = ifelse(is.na(live_births),0,live_births))

infant_deaths <- read_delim(here::here("data","raw","DATASUS","obitos_infantis_municipios_res_mae.csv"),
                            delim = ";",
                            skip = 3,
                            local = locale(encoding = "latin1"),
                            na = "-")

infant_deaths <- infant_deaths %>% 
  mutate(mun_code = str_sub(Município,1,6),
         mun_name = str_sub(Município,8)) %>% 
  filter(!is.na(as.numeric(mun_code))) %>% 
  select(-Município,-Total)

infant_deaths <- infant_deaths %>% 
  pivot_longer(cols = 1:16, names_to = "year", values_to = "infant_deaths") %>% 
  mutate(year = as.numeric(year),
         infant_deaths = ifelse(is.na(infant_deaths),0,infant_deaths))

infant_mortality <- infant_deaths %>% 
  full_join(live_births, by = c("mun_code","mun_name","year"))

infant_mortality <- infant_mortality %>% 
  mutate(infant_mort_rate = infant_deaths/live_births*1000)

# Eliminate cities that do not exist (ignored in the raw files)
infant_mortality <- infant_mortality %>% 
  filter(!str_detect(mun_name,"Município ignorado"))

infant_mortality %>% group_by(year) %>% summarise(n_obs = n())

# Prepare ESF -----

ESF <- read_delim(here::here("data","raw","DATASUS","ESF_saude_familia_municipio.csv"),
                          delim = ";",
                          skip = 5,
                          local = locale(encoding = "latin1"),
                          na = "-")
ESF <- ESF %>% 
  mutate(mun_code = str_sub(Município,1,6),
         mun_name = str_sub(Município,8)) %>% 
  filter(!is.na(as.numeric(mun_code))) %>% 
  select(mun_code,contains("dez"))

ESF <- ESF %>% 
  pivot_longer(cols = ends_with("Dez"), names_to = "year", values_to = "number_esf_team") %>% 
  mutate(year = as.numeric(str_extract(year,"[0-9]{4}")),
         number_esf_team = ifelse(is.na(number_esf_team),0,number_esf_team)) %>% 
  filter(year <= 2016)

ESF <- ESF %>% 
  left_join(mun_pop, by = c("mun_code","year")) %>% 
  mutate(esf_100K = number_esf_team/mun_pop*100000) %>% 
  select(mun_code,year,esf_100K)

# Check number of municipalities per year
ESF %>% 
  group_by(year) %>% 
  summarise(n_obs = n())

# Create health outcomes df ----

cnes_health_outcomes <- cnes %>% 
  select(mun_code,year,ends_with("100K"))

infant_mortality <- infant_mortality %>% 
  select(mun_code,year,infant_mort_rate)

health_outcomes <- cnes_health_outcomes %>% 
  full_join(infant_mortality, by = c("mun_code","year")) %>% 
  full_join(ESF, by = c("mun_code","year") )

health_outcomes %>% group_by(year) %>% summarise(n_obs = n())

# Center health outcomes

health_outcomes <-  map(seq(2004,2012, by = 4), center, df = health_outcomes) %>%
  bind_rows() %>% 
  select(mun_code,year, everything()) %>%  
  mutate_at(vars(matches("_t_")),
            .funs = funs(as.numeric(.)))

#Save

write_rds(x = health_outcomes,path = here::here("data","processed","welfare_outcomes","health_outcomes.rds"))
